#!/usr/bin/env python3
# DannyNiu/NJF, 2025-05-01. Public Domain.

import collections

# type can be one of:
# - stoken
# - vtoken
# - prod
sym = collections.namedtuple("sym", "type optional value")

import io, sys

if len(sys.argv) != 4:
    exit("3 arguments required!")

if sys.argv[1] != "decl" and sys.argv[1] != "def":
    exit("1st argument must be 'decl' or 'def'.")
# argv[2] is grammar name,
# argv[3] is header name for lexer declarations.

vtokens = set({'a'})
try: import vtoken_defs
except: print('Import of `vtoken_defs` failed, using empty set.',
              file=sys.stderr)

upper = "ABCDEFG"+"HIJKLMN"+"OPQRST"+"UVWXYZ"
lower = "abcdefg"+"hijklmn"+"opqrst"+"uvwxyz"
digit = "0123456789"

# possible values:
# - init
fsm = "init"

lhs = ""
lhl = ""
s = ""
prod = []

rules = []

def shift1():
    q = False
    s = ""
    while True:
        c = sys.stdin.read(1)
        if not c:
            if q:
                raise Exception("Ended in partial S-token")
            else:
                if s: yield s
                break
        if q:
            s += c
            if c == '\\':
                s += sys.stdin.read(1)
            elif c == '"':
                q = False
                yield s
                s = ""
        elif c in upper+lower+digit+"_-.":
            s += c
        elif c == '"':
            q = True
            s += c
        elif c in ":|;%":
            if s: yield s
            s = ""
            yield c
        else:
            if s: yield s
            s = ""
            continue

def Decl1Rule(lhl, lab):
    print("void *{}_{}(lalr_rule_params);".format(lhl, lab))

def Emit1Rule(lhs, lhl, prod, lab):
    print("void *{}_{}(lalr_rule_params)".format(lhl, lab))
    print("{")
    print("    int32_t production = hRule(\"{}\");".format(lhs))
    print("    static lalr_rule_symbol_t symbolseq[] = {")

    for p in prod:
        opt = ""
        val = "value"
        if p.optional: opt = "lalr_opt "
        if p.type == "vtoken": val = "vtype"
        print(" "*8+"{"+" symtype_{}, .{} = {}, {}".format(
            p.type, val, p.value, opt)+"},")

    print("        {0},\n    };")
    print("    (void)ctx;")
    print("    return lalr_rule_actions_generic(lalr_rule_gen_args);")
    print("}\n")

print('/* Auto-generated by "utils/grammar2rules.py". May be adapted. */')
if sys.argv[1] == "decl":
    print('#ifndef dcc_{}_grammar_h'.format(sys.argv[2]))
    print('#define dcc_{}_grammar_h 1'.format(sys.argv[2]))
    print('#include "../lalr-common/lalr.h"')
    print('extern strvec_t *ns_rules_{};'.format(sys.argv[2]))
    print('#define hRule(s) strvec_str2i(ns_rules_{}, s)'.format(sys.argv[2]))
elif sys.argv[1] == "def":
    print('#define dcc_lalr_defining_grammar')
    print('#include "{}-grammar.h"'.format(sys.argv[2]))
    print('#include "{}"'.format(sys.argv[3]))
    print('strvec_t *ns_rules_{};'.format(sys.argv[2]))

for t in shift1():
    match fsm:

        case "init":
            if t in ":|;%" or t[0] == '"':
                raise Exception("Unexpected token: {}".format(t))
            lhs = t
            fsm = "expect_label_lhs"

        case "expect_label_lhs":
            if t != "%":
                raise Exception("Unexpected token: {}".format(t))
            fsm = "label_lhs"

        case "label_lhs":
            if t in ":|;%" or t[0] == '"':
                raise Exception("Unexpected token: {}".format(t))
            lhl = t
            fsm = "expect_rules_start"

        case "expect_rules_start":
            if t != ':':
                raise Exception("Unexpected token: {}".format(t))
            fsm = "expect_terms"

        case "expect_terms":
            if t in vtokens:
                prod += [ sym("vtoken", t.endswith("_opt"), t) ]
            elif t[0] == '"':
                prod += [ sym("stoken", False, t) ]
            elif t == '%':
                fsm = "expect_label_for_rule"
            else:
                prod += [ sym("prod", t.endswith("_opt"), '"'+t+'"') ]

        case "expect_label_for_rule":
            if t in ":|;%" or t[0] == '"':
                raise Exception("Unexpected token: {}".format(t))
            rules += [ lhl+"_"+t ]
            if sys.argv[1] == "decl":
                Decl1Rule(lhl, t)
            elif sys.argv[1] == "def":
                Emit1Rule(lhs, lhl, prod, t)
            s = ""
            prod = []
            fsm = "expect_next_rule"

        case "expect_next_rule":
            if t == '|':
                fsm = "expect_terms"
            elif t == ';':
                lhs = ""
                lhl = ""
                fsm = "init"
            else:
                raise Exception("Unexpected token: {}".format(t))

if sys.argv[1] == "decl":
    print('extern lalr_rule_t {}_grammar_rules[];'.format(sys.argv[2]))
    print('#endif /* dcc_{}_grammar_h */'.format(sys.argv[2]))
elif sys.argv[1] == "def":
    print('lalr_rule_t {}_grammar_rules[] = '.format(sys.argv[2])+"{")
    for r in rules:
        print("    {},".format(r))
    print("    NULL,\n};")
